/* Export the non-imputed data for the FHS industries to .csv format for CART imputation. */

%include "ASMimplibs.sas";


%macro select_and_export(ind,pqsfl);

data &ind._gooddata (keep = survu_id year tvs pqs pv cm cf ee tab sw nonpw ph ww pw exit02
                     tvs_zerof cm_zerof cf_zerof ee_zerof tab_zerof ph_zerof pw_zerof ww_zerof);
  set fhs.phy&ind.f;
   tvs_zerof = 0;
   cm_zerof = 0;
   cf_zerof = 0;
   ee_zerof = 0;
   tab_zerof = 0;
   ph_zerof = 0;
   pw_zerof = 0;
   ww_zerof = 0;
   if tvs_imp=1 then tvs = .;
   if tvs = 0 
   then do;
     tvs = 1;
     tvs_zerof = 1;
   end;
   if cm_imp=1 then  cm = .;
   if cm = 0 
   then do;
     cm = 1;
     cm_zerof = 1;
   end;
   if cf_imp=1 then  cf = .;
   if cf = 0  
   then do;
     cf = 1;
     cf_zerof = 1;
   end;
   if ee_imp=1 then  ee = .;
   if ee = 0  
   then do;
     ee = 1;
     ee_zerof = 1;
   end;
   if tab_imp=1 then tab = .;
   if tab = 0  
   then do;
     tab = 1;
     tab_zerof = 1;
   end;
   if te_imp=1 then  te = .;
   if ph_imp=1 then  ph = .;
   if ph = 0  
   then do;
     ph = 1;
     ph_zerof = 1;
   end;
   if ww_imp=1 then  ww = .;
   if ww = 0  
   then do;
     ww = 1;
     ww_zerof = 1;
   end;
   if pw_imp=1 then  pw = .;
   if pw = 0  
   then do;
     pw = 1;
     pw_zerof = 1;
   end;
   nonpw = te - pw;
   if pqs_valimpr > 0.50 then pqs = .;
   if pv_valimpr > 0.50 then pv = .;
   /* Make PQS missing if PV is positive but PQS is zero */
   if pv>0 and pqs = 0 then pqs = .;
   /* However, for industries with no PQS for most products in 2007, 
      set all values of pqs to zero. Since CART only selects variables
      that help to characterize the conditional distribution of other variables,
      PQS should not be selected for these industries when year=2007.
      This also means we won't create imputations for PQS for these industries
      in 2007.
   */
   if year = 2007 and &pqsfl = 1 then pqs = 0;
   if tvs=. and (pqs=. or pqs=0) and pv=. and cm=. and cf=. and ee=. and tab=. and sw=. and nonpw=. and ph=. and ww=. and pw=. then delete;
 run;


 proc sort data=&ind._gooddata; by year survu_id; run;

 proc means data= &ind._gooddata N NMISS min mean max; 
 by year;
  var tvs pqs pv cm cf ee tab sw nonpw ph ww pw exit02;
 title "Non-imputed and imputed/missing for &ind";
 run;

 /* Save the survu_id's  and the zero flags for re-merging after CART imputes. */
 data fhs.&ind._gooddata_ids (keep = number survu_id year tvs_zerof cm_zerof cf_zerof ee_zerof tab_zerof ph_zerof ww_zerof pw_zerof); 
  set &ind._gooddata;
  number = _N_;
 run;

 /**  Now do it separately by year: **/

 data &ind._gooddata02; set &ind._gooddata; if year=2002; run; 

 data fhs.&ind._gooddata_ids02 (keep = number survu_id tvs_zerof cm_zerof cf_zerof ee_zerof tab_zerof ph_zerof ww_zerof pw_zerof); 
  set &ind._gooddata02;
  number = _N_;
 run;

 data &ind._gooddata02 (drop = survu_id year tvs_zerof cm_zerof cf_zerof ee_zerof tab_zerof ph_zerof ww_zerof pw_zerof);
 set &ind._gooddata02;
 run;

 data &ind._gooddata07; set &ind._gooddata; if year=2007; run; 

 data fhs.&ind._gooddata_ids07 (keep = number survu_id tvs_zerof cm_zerof cf_zerof ee_zerof tab_zerof ph_zerof ww_zerof pw_zerof); 
  set &ind._gooddata07;
  number = _N_;
 run;

 data &ind._gooddata07 (drop = survu_id year tvs_zerof cm_zerof cf_zerof ee_zerof tab_zerof ph_zerof ww_zerof pw_zerof);
 set &ind._gooddata07;
 run;

 data &ind._gooddata (drop = survu_id tvs_zerof cm_zerof cf_zerof ee_zerof tab_zerof ph_zerof ww_zerof pw_zerof);
 set &ind._gooddata;
 run;

 PROC EXPORT DATA= &ind._gooddata 
  OUTFILE= "&ind._gooddata.csv" 
  DBMS=CSV REPLACE;
  PUTNAMES=YES;
 RUN;


 PROC EXPORT DATA= &ind._gooddata02 
  OUTFILE= "&ind._gooddata02.csv" 
  DBMS=CSV REPLACE;
  PUTNAMES=YES;
 RUN;
 

 PROC EXPORT DATA= &ind._gooddata07 
  OUTFILE= "&ind._gooddata07.csv" 
  DBMS=CSV REPLACE;
  PUTNAMES=YES;
 RUN;


%mend;

/* Industries with quantity data for all products in 2002 AND 2007: */

%select_and_export(floor,0);
%select_and_export(gas,0);
%select_and_export(ice,0);
%select_and_export(sugar,0);

/* Industries with quantity data for all products only in 2002: */

%select_and_export(bread,1);
%select_and_export(carbon,1);
%select_and_export(coffee,1);
%select_and_export(plywood,1);

%select_and_export(boxes,1);

/** Exclude concrete, since we already imputed for concrete for the Syverson replication, and it takes a long time */





